/*
 * Protocol independent interface for MX
 */

#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#ifndef _WIN32
#include <poll.h>
#include <ctype.h>
#include <netinet/in.h>
#endif

#include "myriexpress.h"
#include "mx_raw.h"

#if !defined(MX_RAW_POLL_SUPPORTED) && !defined(_WIN32)
#include <pthread.h>
#include <sys/socket.h>
#endif

#include "libfma.h"
#include "libmyri.h"

#define MYRI_MX_MAX_NICS 8
#define MYRI_MX_RX_BUF_LEN 1024
#define MYRI_MX_RAW_SEND_TOKENS 32

#ifdef MX_RAW_POLL_SUPPORTED
#define MYRI_MX_MAX_HANDLE 64		/* we want MX fd < this number */
#else
#define MYRI_MX_EVENT_TIMEOUT 1000	/* one second */
#endif

struct myri_mx_raw_send {
  struct myri_mx *mip;
  int port;
  void *route;
  int route_len;
  void *tx_buf;
  int tx_len;
  void *context;

  struct myri_mx_raw_send *next;
  struct myri_mx_raw_send *prev;
};

/*
 * create a mapping for the counters we are interested in
 */
enum {
  MYRI_MX_CTR_BADCRC8,
  MYRI_MX_CTR_BADCRC32,
  MYRI_MX_CTR_COUNT
};

#ifdef MX_RAW_POLL_SUPPORTED
struct myri_mx {
  int mx_fd;

  mx_raw_endpoint_t raw_ep;
  unsigned int num_ports;

  /* raw_send_queue_lock guards both raw_send_queue and raw_tx_buf_list */
  int raw_send_tokens;
  struct myri_mx_raw_send raw_send_queue;

  char *counter_labels;
  uint32_t *mx_counters;
  int counter_index[2][MYRI_MX_CTR_COUNT];
  uint32_t count;
};

#else  /* ! MX_RAW_POLL_SUPPORTED */

struct myri_mx {
  int rd_fd;
  int wr_fd;

  mx_raw_endpoint_t raw_ep;
  unsigned int num_ports;

  pthread_mutex_t event_queue_lock;
  struct myri_event event_queue;
  struct myri_event null_event;

  /* raw_send_queue_lock guards both raw_send_queue and raw_tx_buf_list */
  int raw_send_tokens;
  pthread_mutex_t raw_send_queue_lock;
  struct myri_mx_raw_send raw_send_queue;

  pthread_t raw_thread;
  int thread_active;		/* TRUE if thread running */

  int done;			/* set to tell waiter thread to exit */
  char *counter_labels;
  uint32_t *mx_counters;
  int counter_index[2][MYRI_MX_CTR_COUNT];
  uint32_t count;
};
#endif

/* global vairables for the mi interface */
static struct myri_mx Myri[MYRI_MX_MAX_NICS];
static int MX_initted;
#ifdef MX_RAW_POLL_SUPPORTED
static int Mx_fd_to_id[MYRI_MX_MAX_HANDLE];
#endif

/*
 * local prototypes
 */
#ifndef MX_RAW_POLL_SUPPORTED
static void myri_mx_enqueue_event(struct myri_mx *mip,
                                 struct myri_event *mep);
static void *myri_mx_raw_thread(void *arg);
static int myri_open_sockets(struct myri_mx *mip);
static void myri_close_sockets(struct myri_mx *mip);
#endif
static void myri_mx_do_raw_send(struct myri_mx_raw_send *sp);
static void myri_mx_release_raw_send_token(struct myri_mx *mip);

static void mx_macaddr_to_nic_id(lf_mac_addr_t mac_address, uint64_t *nic_id);
static int myri_mx_convert_status(mx_raw_status_t mxstat);

/****************************************************************
 *       versions of myri routines for MX that can poll
 ****************************************************************/

#ifdef MX_RAW_POLL_SUPPORTED

/*
 * Instantiate the myri_ interface for MX
 */
int
myri_open(
  int nic_id)
{
  struct myri_mx *mip;
  mx_return_t mxrc;

  /* call mx_init() once */
  if (MX_initted == 0) {
    putenv("MX_ERRORS_ARE_FATAL=0");
    mxrc = mx_init();
    if (mxrc != MX_SUCCESS) {
       fprintf(stderr, "mx_init(): %s\n", mx_strerror(mxrc));
       exit (1);
    }
    ++MX_initted;
  }


  /* validate the NIC ID */

  if (nic_id >= MYRI_MX_MAX_NICS) {
    fprintf(stderr, "Bad nic_id: %d\n", nic_id);
    errno = EINVAL;
    return -1;
  }

  mip = &Myri[nic_id];

  /* initialize raw send tokens and raw send list */
  mip->raw_send_tokens = MYRI_MX_RAW_SEND_TOKENS;
  mip->raw_send_queue.next = &mip->raw_send_queue;
  mip->raw_send_queue.prev = &mip->raw_send_queue;

  /* open the raw MX interface */
  mxrc = mx_raw_open_endpoint(nic_id, NULL, 0, &mip->raw_ep);
  if (mxrc != MX_SUCCESS) {
    if (mxrc == MX_BUSY) {
      errno = EBUSY;
    } else if (mxrc == MX_NO_PERM) {
      errno = EPERM;
    } else {
      errno = ENODEV;
    }
    return -1;
  }

  /* get number of ports */
  mxrc = mx_raw_num_ports(mip->raw_ep, &mip->num_ports);
  if (mxrc != MX_SUCCESS) {
    goto error;
  }

  /*
   * Get a file descriptor for this raw endpoint
   */
  mip->mx_fd = mx_raw_handle(mip->raw_ep);
  if (mip->mx_fd == -1) {
    perror("Error getting MX raw file handle");
    goto error;
  }
  if (mip->mx_fd >= MYRI_MX_MAX_HANDLE) {
    perror("MX raw handle too large");
    goto error;
  }
  Mx_fd_to_id[mip->mx_fd] = nic_id;

  return nic_id;

 error:
  myri_close(nic_id);
  return -1;
}

/*
 * Close a myri instance
 */
void
myri_close(
  int nic_id)
{
  struct myri_mx *mip;

  if (nic_id >= MYRI_MX_MAX_NICS) return;

  mip = &Myri[nic_id];
  
  if (mip->raw_ep != NULL) {
    mx_raw_close_endpoint(mip->raw_ep);
    mip->raw_ep = NULL;
  }
}

/*
 * Return the file descriptor associated with a myri handle
 */
int
myri_fd(
  int nic_id)
{
  return Myri[nic_id].mx_fd;
}

/*
 * Get the next event from the MX thread
 */
int
myri_next_event(
  int nic_id,
  struct myri_event **rep,
  int timeout)
{
  struct myri_mx *mip;
  mx_raw_status_t stat;
  mx_return_t mxrc;
  struct myri_event *mep;
  static struct myri_event event_buf;
  static unsigned char rx_buf[MYRI_MX_RX_BUF_LEN];

  mip = &Myri[nic_id];
  mep = &event_buf;

  mep->d.raw_recv.rx_len = MYRI_MX_RX_BUF_LEN;	/* set RX len */

  /* get the next event */
  mxrc = mx_raw_next_event(mip->raw_ep,  &mep->d.raw_recv.port, 
			 &mep->d.raw_send.context,
			 rx_buf, &mep->d.raw_recv.rx_len,
			 timeout, &stat);


  /* If NIC is dead, return an event with some detail */
  if (mxrc == MX_NIC_DEAD) {
    mep->type = MYRI_EVENT_ERROR;
    mep->d.error.error = myri_mx_convert_status(stat);

  } else if (mxrc != MX_SUCCESS) {
    fprintf(stderr,"mxrc = %d, Error getting MX raw event\n", mxrc);
    return -1;

  } else switch (stat) {
    case MX_RAW_RECV_COMPLETE:

      mep->type = MYRI_EVENT_RAW_RECV_COMPLETE;
      mep->d.raw_recv.rxbuf = rx_buf;
      break;

    case MX_RAW_SEND_COMPLETE:
      
      mep->type = MYRI_EVENT_RAW_SEND_COMPLETE;
      myri_mx_release_raw_send_token(mip);
      break;

    default:
      break;
  }

  *rep = mep;
  return 0;
}

/*
 * If there is a queued send, send it now, else increment send_tokens.
 */
static void
myri_mx_release_raw_send_token(
  struct myri_mx *mip)
{
  struct myri_mx_raw_send *sp;

  /* if send queue not empty, grab the oldest and leave token count alone */
  sp = mip->raw_send_queue.prev;
  if (sp != &mip->raw_send_queue) {

    /* found a send to do, unlink it */
    sp->prev->next = sp->next;
    sp->next->prev = sp->prev;

    myri_mx_do_raw_send(sp);

    /* free the raw_send_queue entry */
    free(sp->route);
    free(sp);

  /* no pending sends, increment token count */
  } else {
    ++mip->raw_send_tokens;
  }
}

/*
 * Release an event struct returned by myri_next_event
 */
void
myri_release_event(
  struct myri_event *mep)
{
}

/*
 * Perform a raw send
 */
int
myri_raw_send(
  int nic_id,
  int port,
  void *route,
  int route_len,
  void *txbuf,
  int txlen,
  void *context)
{
  struct myri_mx *mip;
  struct myri_mx_raw_send *sp;
  int ret;

  ret = 0;		/* default to good return */
  mip = &Myri[nic_id];

  /*
   * If we do not have too many raw sends in progress, we can
   * queue another one now.  We don't need to copy the data since MX
   * copies it out for us.  If this ever changes, we need to start making 
   * a copy of the data here.
   * Otherwise, we need to queue this.
   */

  /* if we are free to send, do it, else queue this send */
  if (mip->raw_send_tokens > 0) {
    mx_return_t mxrc;

    --mip->raw_send_tokens;

    mxrc = mx_raw_send(mip->raw_ep, port, route, route_len,
	               txbuf, txlen, context);
    if (mxrc != MX_SUCCESS) {
      fprintf(stderr, "Error %d from mx_raw_send\n", mxrc);
      ret = -1;
      goto leave;
    }

  /* no raw tx buffers available, queue this send */
  } else {

    /* allocate a send queue entry */
    sp = (struct myri_mx_raw_send *) calloc(sizeof(*sp), 1);
    if (sp == NULL) {
      perror("Allocating send queue entry");
      ret = -1;
      goto leave;
    }

    /* allocate a place to put route and data */
    sp->route = malloc(route_len + txlen);
    if (sp->route == NULL) {
      free(sp);
      perror("Allocating send buffer");
      ret = -1;
      goto leave;
    }
    memcpy(sp->route, route, route_len);
    memcpy((char*)sp->route+route_len, txbuf, txlen);

    /* fill in the rest */
    sp->mip = mip;
    sp->port = port;
    sp->route_len = route_len;
    sp->tx_buf = (char*)sp->route + route_len;
    sp->tx_len = txlen;
    sp->context = context;

    sp->next = mip->raw_send_queue.next;
    sp->prev = &mip->raw_send_queue;

    sp->next->prev = sp;
    sp->prev->next = sp;
  }

 leave:
  return ret;
}

#else /* !MX_RAW_POLL_SUPPORTED */

/****************************************************************
 *       versions of myri routines for MX that can NOT poll
 ****************************************************************/

/*
 * Instantiate the myri_ interface for MX
 */
int
myri_open(
  int nic_id)
{
  struct myri_mx *mip;
  mx_return_t mxrc;
  int rc;

  /* call mx_init() once */
  if (MX_initted == 0) {
    putenv("MX_ERRORS_ARE_FATAL=0");
    mxrc = mx_init();
    if (mxrc != MX_SUCCESS) {
       fprintf(stderr, "mx_init(): %s\n", mx_strerror(mxrc));
       exit (1);
    }
    ++MX_initted;
  }


  /* validate the NIC ID */

  if (nic_id >= MYRI_MX_MAX_NICS) {
    fprintf(stderr, "Bad nic_id: %d\n", nic_id);
    errno = EINVAL;
    return -1;
  }

  mip = &Myri[nic_id];
  mip->rd_fd = -1;
  mip->thread_active = 0;

  /* empty the event queue */
  mip->event_queue.next = &mip->event_queue;
  mip->event_queue.prev = &mip->event_queue;

  /* initialize raw send tokens and raw send list */
  mip->raw_send_tokens = MYRI_MX_RAW_SEND_TOKENS;
  mip->raw_send_queue.next = &mip->raw_send_queue;
  mip->raw_send_queue.prev = &mip->raw_send_queue;

  /* fill in NULL event */
  mip->null_event.type = MYRI_EVENT_NO_EVENT;

  /* open the raw MX interface */
  mxrc = mx_raw_open_endpoint(nic_id, NULL, 0, &mip->raw_ep);
  if (mxrc != MX_SUCCESS) {
    if (mxrc == MX_BUSY) {
      errno = EBUSY;
    } else if (mxrc == MX_NO_PERM) {
      errno = EPERM;
    } else {
      errno = ENODEV;
    }
    return -1;
  }

  /* get number of ports */
  mxrc = mx_raw_num_ports(mip->raw_ep, &mip->num_ports);
  if (mxrc != MX_SUCCESS) {
    goto error;
  }

  /* set up the pipe we will be using for notifying of events */
  rc = myri_open_sockets(mip);
  if (rc != 0) {
    perror("setting up pipe");
    goto error;
  }

  /* start the raw thread */
  pthread_mutex_init(&mip->event_queue_lock, NULL);
  pthread_mutex_init(&mip->raw_send_queue_lock, NULL);
  rc = pthread_create(&mip->raw_thread, NULL, myri_mx_raw_thread, mip);
  if (rc != 0) {
    perror("Spawning raw thread");
    goto error;
  }
  mip->thread_active = 1;

  /* use the nic_id as a handle for future calls */
  return nic_id;

 error:
  myri_close(nic_id);
  return -1;
}

/*
 * Close a myri instance
 */
void
myri_close(
  int nic_id)
{
  struct myri_mx *mip;

  if (nic_id >= MYRI_MX_MAX_NICS) return;

  mip = &Myri[nic_id];
  myri_close_sockets(mip);
  
  mip->done = 1;
  if (mip->thread_active) {
    pthread_join(mip->raw_thread, NULL);
    mip->thread_active = 0;
  }

  if (mip->raw_ep != NULL) {
    mx_raw_close_endpoint(mip->raw_ep);
    mip->raw_ep = NULL;
  }
}

/*
 * Return the file descriptor associated with a myri handle
 */
int
myri_fd(
  int nic_id)
{
  return Myri[nic_id].rd_fd;
}

/*
 * Get the next event from the MX thread
 */
int
myri_next_event(
  int nic_id,
  struct myri_event **rep,
  int timeout)
{
  struct myri_event *mep;
  struct myri_mx *mip;
  char junk;
  int rc;

  mip = &Myri[nic_id];

  /*
   * If a non-negative timeout is specified, poll on the pipe for
   * that long and return NO_EVENT if timeout occurs
   */
  if (timeout >= 0) {
    struct pollfd pf;

    /* execute a poll() with appropriate timeout */
    pf.fd = mip->rd_fd;
    pf.events = POLLIN;
    rc = poll(&pf, 1, timeout);

    /* bad if error and not EINTR */
    if (rc == -1 && errno != EINTR
#if HAVE_DECL_ERESTART
	&& errno != ERESTART
#endif
	&& errno != EAGAIN) {
      return -1;
    }

    /* return of 0 from poll means nothing is ready */
    if (rc == 0 || (pf.revents & POLLIN) == 0) {
      *rep = &mip->null_event;
      return 0;
    }
  }

  /* read one byte from the pipe to ensure an event is ready */
  rc = recv(mip->rd_fd, &junk, 1, 0);
  if (rc != 1) return -1;

  /* lock while we manipulate the queue */
  pthread_mutex_lock(&mip->event_queue_lock);

  mep = mip->event_queue.prev;
  if (mep == &mip->event_queue) {
    fprintf(stderr, "event queue unexpectedly empty!\n");
    pthread_mutex_unlock(&mip->event_queue_lock);
    exit(1);
  }
  
  /* unlink this element from the list*/
  mep->next->prev = mep->prev;
  mep->prev->next = mep->next;

  /* all done with the queue */
  pthread_mutex_unlock(&mip->event_queue_lock);
  
  *rep = mep;
  return 0;
}

/*
 * Enqueue an event and let the main thread know about it
 */
static void
myri_mx_enqueue_event(
  struct myri_mx *mip,
  struct myri_event *mep)
{
  static char junk=0;	/* avoid uninitialized error from valgrind */
  int rc;

  /* lock while we manipulate the queue */
  pthread_mutex_lock(&mip->event_queue_lock);

  /* link this element into the head of the list */
  mep->next = mip->event_queue.next;
  mep->prev = &mip->event_queue;

  mep->next->prev = mep;
  mep->prev->next = mep;

  /* all done with the queue */
  pthread_mutex_unlock(&mip->event_queue_lock);

  /* write a byte to the pipe to wake up main thread */
  rc = send(mip->wr_fd, &junk, 1, 0);
  if (rc != 1) {
    perror("Writing to pipe");
    exit(1);
  }
}

/*
 * raw communication thread
 */
static void *
myri_mx_raw_thread(
  void *arg)
{
  struct myri_mx *mip;
  struct myri_event *mep;
  mx_raw_status_t stat;
  mx_return_t mxrc;
  void *rx_buf;

  mip = arg;
  rx_buf = NULL;
  mep = NULL;

  /*
   * Loop waiting for events.  Everytime we get an event, we queue
   * a corresponding event for the main thread to read and write into
   * our end of the pipe to inidicate it's time to look for an event.
   */
  while (mip->done == 0) {

    /* get a struct to hold our next event */
    if (mep == NULL) {
      mep = (struct myri_event *) malloc(sizeof(*mep));
      if (mep == NULL) {
	perror("allocating raw event struct");
	exit(1);
      }
    }

    /* get a receive buffer now if we need one */
    if (rx_buf == NULL) {
      rx_buf = malloc(MYRI_MX_RX_BUF_LEN);
      if (rx_buf == NULL) {
	perror("allocating raw receive buffer");
	exit(1);
      }
    }

    mep->d.raw_recv.rx_len = MYRI_MX_RX_BUF_LEN;	/* set RX len */

    /* get the next event */
    mxrc = mx_raw_next_event(mip->raw_ep,  &mep->d.raw_recv.port, 
			   &mep->d.raw_send.context,
	                   rx_buf, &mep->d.raw_recv.rx_len,
			   MYRI_MX_EVENT_TIMEOUT, &stat);

    /* If NIC is dead, enqueue an event with some detail */
    if (mxrc == MX_NIC_DEAD) {
      mep->type = MYRI_EVENT_ERROR;
      mep->d.error.error = myri_mx_convert_status(stat);
      myri_mx_enqueue_event(mip, mep);

      mep = NULL;		/* clean up our buffers */
      free(rx_buf);
      return NULL;		/* exit the thread */

    } else if (mxrc != MX_SUCCESS) {
      fprintf(stderr,"mxrc = %d, Error getting MX raw event\n", mxrc);
      exit(1);

    } else switch (stat) {
      case MX_RAW_RECV_COMPLETE:

	mep->type = MYRI_EVENT_RAW_RECV_COMPLETE;
	mep->d.raw_recv.rxbuf = rx_buf;
	myri_mx_enqueue_event(mip, mep);

	mep = NULL;
	rx_buf = NULL;
	break;

      case MX_RAW_SEND_COMPLETE:
	
	mep->type = MYRI_EVENT_RAW_SEND_COMPLETE;
	myri_mx_enqueue_event(mip, mep);

	myri_mx_release_raw_send_token(mip);

	mep = NULL;
	break;

      default:
	break;
    }
  }

  return NULL;
}

/*
 * If there is a queued send, send it now, else increment send_tokens.
 */
static void
myri_mx_release_raw_send_token(
  struct myri_mx *mip)
{
  struct myri_mx_raw_send *sp;

  pthread_mutex_lock(&mip->raw_send_queue_lock);

  /* if send queue not empty, grab the oldest and leave token count alone */
  sp = mip->raw_send_queue.prev;
  if (sp != &mip->raw_send_queue) {

    /* found a send to do, unlink it */
    sp->prev->next = sp->next;
    sp->next->prev = sp->prev;

    myri_mx_do_raw_send(sp);

    /* free the raw_send_queue entry */
    free(sp->route);
    free(sp);

  /* no pending sends, increment token count */
  } else {
    ++mip->raw_send_tokens;
  }

  /* all done with list manipulation and send */
  pthread_mutex_unlock(&mip->raw_send_queue_lock);
}

/*
 * Release an event struct returned by myri_next_event
 */
void
myri_release_event(
  struct myri_event *mep)
{
  /* NO_EVENT is passed via a static event buffer */
  if (mep->type == MYRI_EVENT_NO_EVENT) {
    return;
  }

  if (mep->type == MYRI_EVENT_RAW_RECV_COMPLETE) {
    free(mep->d.raw_recv.rxbuf);
  }

  free(mep);
}

/*
 * Perform a raw send
 */
int
myri_raw_send(
  int nic_id,
  int port,
  void *route,
  int route_len,
  void *txbuf,
  int txlen,
  void *context)
{
  struct myri_mx *mip;
  struct myri_mx_raw_send *sp;
  int ret;

  ret = 0;		/* default to good return */
  mip = &Myri[nic_id];

  /*
   * If we do not have too many raw sends in progress, we can
   * queue another one now.  We don't need to copy the data since MX
   * copies it out for us.  If this ever changes, we need to start making 
   * a copy of the data here.
   * Otherwise, we need to queue this.
   */
  pthread_mutex_lock(&mip->raw_send_queue_lock);

  /* if we are free to send, do it, else queue this send */
  if (mip->raw_send_tokens > 0) {
    mx_return_t mxrc;

    --mip->raw_send_tokens;

    mxrc = mx_raw_send(mip->raw_ep, port, route, route_len,
	               txbuf, txlen, context);
    if (mxrc != MX_SUCCESS) {
      fprintf(stderr, "Error %d from mx_raw_send\n", mxrc);
      ret = -1;
      goto leave;
    }

  /* no raw tx buffers available, queue this send */
  } else {

    /* allocate a send queue entry */
    sp = (struct myri_mx_raw_send *) calloc(sizeof(*sp), 1);
    if (sp == NULL) {
      perror("Allocating send queue entry");
      ret = -1;
      goto leave;
    }

    /* allocate a place to put route and data */
    sp->route = malloc(route_len + txlen);
    if (sp->route == NULL) {
      free(sp);
      perror("Allocating send buffer");
      ret = -1;
      goto leave;
    }
    memcpy(sp->route, route, route_len);
    memcpy((char*)sp->route+route_len, txbuf, txlen);

    /* fill in the rest */
    sp->mip = mip;
    sp->port = port;
    sp->route_len = route_len;
    sp->tx_buf = (char*)sp->route + route_len;
    sp->tx_len = txlen;
    sp->context = context;

    sp->next = mip->raw_send_queue.next;
    sp->prev = &mip->raw_send_queue;

    sp->next->prev = sp;
    sp->prev->next = sp;
  }

 leave:
  pthread_mutex_unlock(&mip->raw_send_queue_lock);

  return ret;
}

static int
myri_open_sockets(
  struct myri_mx *mip)
{
  int sock;
  struct sockaddr_in addr;
  socklen_t len;
  int rc;

  sock = socket(PF_INET, SOCK_STREAM, 0);
  if (sock == -1) {
    perror("opening socket");
    goto abort_with_nothing;
  }
  addr.sin_family = AF_INET;
  addr.sin_port = 0;
  addr.sin_addr.s_addr = INADDR_ANY;
  rc = bind(sock, (struct sockaddr*)&addr, sizeof(addr));
  if (rc == -1) {
    perror("binding socket");
    goto abort_with_sock;
  }
  rc = listen(sock, 1);
  if (rc == -1) {
    perror("listening");
    goto abort_with_sock;
  }
  mip->wr_fd = socket(PF_INET, SOCK_STREAM, 0);
  if (mip->wr_fd == -1) {
    perror("opening writer");
    goto abort_with_sock;
  }
  len = sizeof(addr);
  rc = getsockname(sock, (struct sockaddr*)&addr, &len);
  if (rc == -1) {
    perror("getting sock name");
    goto abort_with_writer;
  }
  rc = connect(mip->wr_fd, (struct sockaddr*)&addr, sizeof(addr));
  if (rc == -1) {
    perror("connecting writer");
    goto abort_with_writer;
  }
  mip->rd_fd = accept(sock, NULL, NULL);
  if (mip->rd_fd == -1) {
    perror("connecting reader");
    goto abort_with_writer;
  }
  close(sock);
  return 0;

 abort_with_writer:
  close(mip->wr_fd);
 abort_with_sock:
  close(sock);
 abort_with_nothing:
  return -1;
}

static void
myri_close_sockets(
  struct myri_mx *mip)
{
  if (mip->rd_fd != -1) {
    close(mip->rd_fd);
    close(mip->wr_fd);
  }
}

#endif

/*
 * Perform the guts of a raw send
 */
static void
myri_mx_do_raw_send(
  struct myri_mx_raw_send *sp)
{
  mx_return_t mxrc;

  mxrc = mx_raw_send(sp->mip->raw_ep, sp->port, sp->route, sp->route_len,
	             sp->tx_buf, sp->tx_len, sp->context);
  if (mxrc != MX_SUCCESS) {
    fprintf(stderr, "Error from mx_raw_send\n");
    exit(1);
  }
}

/*
 * Get the hostname of a remote host.  If the hostname is returned as ending
 * in ":X" then trim the ":X" and use X and remote nic_id, else remote nic_id
 * is 0.
 */
int
myri_mac_to_hostname(
  int nic_id,
  lf_mac_addr_t mac_addr,
  lf_string_t hostname,
  int *his_nic_id)
{
  uint64_t mx_mac;
  mx_return_t mxrc;
  int hisid;
  int len;
  int i;

  /* convert MAC address into a 64-bit int */
  mx_mac = 0;
  for (i=0; i<6; ++i) {
    mx_mac <<= 8;
    mx_mac |= mac_addr[i];
  }

  /* get the hostname */
  mxrc = mx_nic_id_to_hostname(mx_mac, hostname);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }

  len = strlen(hostname);

  /* Treat a NULL hostname as a failure */
  if (len == 0) {
    return -1;
  }

  /* Check for trailing ":X" to get his_nic_id */
  if (hostname[len-2] == ':' && isdigit(hostname[len-1])) {
    hostname[len-2] = '\0';
    hisid = atoi(hostname + len - 1);
  } else {
    hisid = 0;
  }
  if (his_nic_id != NULL) {
    *his_nic_id = hisid;
  }
  return 0;
}

/*
 * Return information about this NIC
 */
int
myri_get_nic_info(
  int nic_id,
  struct myri_nic_info *nip)
{
  uint64_t mx_mac;
  mx_return_t mxrc;
  char tmpstr[128];
  uint32_t ser_no;
  struct myri_mx *mip;
  int i;
  uint32_t num_ports;
#ifdef MX_HAS_RAW_LINE_SPEED
  mx_line_speed_t speed;
#endif

  /* use the board number to get the MAC address */
  mxrc = mx_board_number_to_nic_id(nic_id, &mx_mac);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }

  /* convert the MX NIC ID into a MAC address */
  for (i=0; i<6; ++i) {
    nip->mac_addr[5-i] = mx_mac & 0xFF;
    mx_mac >>= 8;
  }

  /* product ID */
  mxrc = mx_get_info(NULL, MX_PRODUCT_CODE, &nic_id, sizeof(nic_id),
                     tmpstr, sizeof(tmpstr));
  if (mxrc != MX_SUCCESS) {
    return -1;
  }

  if (strcmp(tmpstr, "unknown") == 0) {
    fprintf(stderr, "Unable to read Product ID from NIC %d\n", nic_id);
  }

  strncpy(nip->product_id, tmpstr, LF_SHORT_STRING_LEN-1);

  /* Number of interfaces */
  mip = &Myri[nic_id];
  mxrc = mx_raw_num_ports(mip->raw_ep, &num_ports);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }
  nip->num_ports = num_ports;

  /* get line speed */
#ifdef MX_HAS_RAW_LINE_SPEED
  mxrc = mx_raw_line_speed(mip->raw_ep, &speed);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }
  switch (speed) {
  case MX_SPEED_2G:
    nip->speed = MYRI_SPEED_2G;
    break;
  case MX_SPEED_10G:
    nip->speed = MYRI_SPEED_10G;
    break;
  }
#else
  nip->speed = MYRI_SPEED_2G;		/* default to 2G in older versions */
#endif

  /* serial number */
  mxrc = mx_get_info(NULL, MX_SERIAL_NUMBER, &nic_id, sizeof(nic_id),
                     &ser_no, sizeof(ser_no));
  if (mxrc != MX_SUCCESS) {
    return -1;
  }

  /* Supported number of routes to each destination */
  nip->num_routes = 8;
  nip->num_routes_is_per_port = TRUE;

  sprintf(nip->serial_no, "%u", ser_no);

  return 0;
}

/*
 * Called before setting a bunch of routes
 */
int
myri_set_route_start(
  int nic_id)
{
  struct myri_mx *mip;
  mx_return_t mxrc;

  mip = &Myri[nic_id];

  mxrc = mx_raw_set_route_begin(mip->raw_ep);
  if (mxrc == MX_SUCCESS) {
    return 0;
  } else {
    errno = EBADF;
    return -1;
  }
}

static void
mx_macaddr_to_nic_id(
  lf_mac_addr_t mac_address,
  uint64_t *nic_id)
{
   uint32_t mac_low32;
  uint16_t mac_high16;

  mac_low32 = ((int)mac_address[5]) & 0xff;
  mac_low32 |= ((int)mac_address[4] & 0xff) << 8;
  mac_low32 |= ((int)mac_address[3] & 0xff) << 16;
  mac_low32 |= ((int)mac_address[2] & 0xff) << 24;
  mac_high16 = (int)mac_address[1] & 0xff;
  mac_high16 |= ((int)mac_address[0] & 0xff) << 8;
  *nic_id = ((uint64_t)mac_high16 << 32ULL) | mac_low32;
}


/*
 * Called to commit a set of routes
 */
int
myri_set_route_end(
  int nic_id,
  lf_mac_addr_t mapper_mac,
  int map_version,
  int num_hosts)
{
  struct myri_mx *mip;
  mx_return_t mxrc;
  uint64_t mapper_id;
  int port;

  mip = &Myri[nic_id];

  mxrc = mx_raw_set_route_end(mip->raw_ep);
  if (mxrc != MX_SUCCESS) {
    errno = EBADF;
    return -1;
  }

  mx_macaddr_to_nic_id(mapper_mac, &mapper_id);
  for (port=0; port < mip->num_ports; ++port) {
    mxrc = mx_raw_set_map_version(mip->raw_ep, port,
				  mapper_id, map_version, num_hosts, 1);
    if (mxrc != MX_SUCCESS) {
      errno = EINVAL;
      return -1;
    }
  }

  return 0;
}

/*
 * set a route
 */
int
myri_set_route(
  int nic_id,
  lf_mac_addr_t remote_mac,
  enum lf_firmware_type fw_type,
  int remote_port,
  int route_num,
  int local_port,
  unsigned char *route,
  int route_len)
{
  struct myri_mx *mip;
  mx_return_t mxrc;
  uint64_t mx_mac;
  int mx_node_type;
  int i;

  mip = &Myri[nic_id];

  mx_mac = 0;
  for (i=0; i<sizeof(lf_mac_addr_t); ++i) {
    mx_mac <<= 8;
    mx_mac |= remote_mac[i];
  }

  switch (fw_type) {
    case MYRI_FW_GM_1:
    case MYRI_FW_GM_2:
      mx_node_type = MX_HOST_GM;
      break;
    case MYRI_FW_MX_1:
      mx_node_type = MX_HOST_MX;
      break;
    case MYRI_FW_XM_1:
    case MYRI_FW_2Z_1:
      mx_node_type = MX_HOST_XM;
      break;
    case MYRI_FW_UNKNOWN:
    default:
      mx_node_type = 0;
      break;
  }
  /* set a route */
  mxrc = mx_raw_set_route(mip->raw_ep, mx_mac, route, route_len,
      			  remote_port, local_port, mx_node_type);
  if (mxrc == MX_SUCCESS) {
    return 0;
  } else {
    errno = EINVAL;
    return -1;
  }
}

/*
 * Get the hostname for a NIC
 */
int
myri_get_hostname(
  int nic_id,
  char *hostname)
{
  uint64_t mx_mac;
  char name[MX_MAX_STR_LEN+1];
  mx_return_t mxrc;

  /* use the board number to get the MAC address */
  mxrc = mx_board_number_to_nic_id(nic_id, &mx_mac);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }

  /* Get the hostname for this NIC */
  mxrc = mx_nic_id_to_hostname(mx_mac, name);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }

  /* Make sure we will have enough room for the name */
  if (strlen(name) > LF_STRING_LEN-1) {
    errno = ENOMEM;
    return -1;
  }

  /* copy out the hostname and return */
  strcpy(hostname, name);
  return 0;
}

/*
 * Set the hostname for a NIC
 */
int
myri_set_hostname(
  int nic_id,
  char *hostname)
{
  struct myri_mx *mip;

  mip = &Myri[nic_id];

  mx_raw_set_hostname(mip->raw_ep, hostname);
  return 0;
}

/*
 * Set the default hostname for a NIC
 */
int
myri_set_dflt_hostname(
  int nic_id,
  char *hostname)
{
  lf_string_t name;
  int rc;

  snprintf(name, sizeof(name), "%s:%d", hostname, nic_id);
  rc = myri_set_hostname(nic_id, name);

  return rc;
}

/*
 * Return counters for a NIC port
 */
int
myri_get_nic_counters(
  int int_nic_id,
  int port,
  struct myri_nic_counters *counters)
{
  struct myri_mx *mip;
  uint32_t nic_id;
  mx_return_t mxrc;
  int p;
  char *clp;

  nic_id = int_nic_id;

  /*
   * Make sure port in range
   */
  mip = &Myri[nic_id];
  if (port >= mip->num_ports) {
    errno = EINVAL;
    return -1;
  }

  /* get counter labels and find ones of interest first time through */
  if (mip->counter_labels == NULL) {
    int i;

    /* figure out how many counters there are */
    mxrc =  mx_get_info(NULL, MX_COUNTERS_COUNT, &nic_id, sizeof (nic_id),
	                    &mip->count, sizeof (mip->count));
    if (mxrc != MX_SUCCESS) {
      errno = EIO;
      return -1;
    }

    /* create space to save the labels */
    mip->counter_labels = (char *) malloc(mip->count * MX_MAX_STR_LEN);
    if (mip->counter_labels == NULL) goto error;

    mip->mx_counters = (uint32_t *) malloc(mip->count * sizeof(uint32_t));
    if (mip->counter_labels == NULL) goto error_with_counter_labels;

    /* get the names of the counters */
    mxrc = mx_get_info(NULL, MX_COUNTERS_LABELS, &nic_id, sizeof (nic_id),
                         mip->counter_labels, MX_MAX_STR_LEN * mip->count);
    if (mxrc != MX_SUCCESS) {
      errno = EIO;
      goto error_with_mx_counters;
    }

    /* initialize counter index array */
    for (i=0; i<MYRI_MX_CTR_COUNT; ++i) {
      mip->counter_index[0][i] = -1;
      mip->counter_index[1][i] = -1;
    }

    /* find the counters we care about */
    clp = mip->counter_labels;
    for (i=0; i<mip->count; ++i) {
      if (strcmp(clp, "Bad CRC8 (Port 0)") == 0) {
	mip->counter_index[0][MYRI_MX_CTR_BADCRC8]= i;

      } else if (strcmp(clp, "Bad CRC8 (Port 1)") == 0) {
	mip->counter_index[1][MYRI_MX_CTR_BADCRC8]= i;

      } else if (strcmp(clp, "Bad CRC32 (Port 0)") == 0) {
	mip->counter_index[0][MYRI_MX_CTR_BADCRC32]= i;

      } else if (strcmp(clp, "Bad CRC32 (Port 1)") == 0) {
	mip->counter_index[1][MYRI_MX_CTR_BADCRC32]= i;
      }

      clp += MX_MAX_STR_LEN;
    }

    /* Check that all appropriate counters are found */
    for (p=0; p<mip->num_ports; ++p) {
      for (i=0; i<MYRI_MX_CTR_COUNT; ++i) {
	if (mip->counter_index[p][i] == -1) {
	  fprintf(stderr, "Warning: Unable to locate proper counters!\n");
	}
      }
    }
  }

  /*
   * Get the current counter values and copy them in
   */
  mxrc = mx_get_info(NULL, MX_COUNTERS_VALUES, &nic_id, sizeof (nic_id),
		     mip->mx_counters, sizeof(uint32_t) * mip->count);
  if (mxrc != MX_SUCCESS) {
    errno = EIO;
    goto error;
  }

  /* badcrc count */
  counters->badcrcs = 0;
  if (mip->counter_index[port][MYRI_MX_CTR_BADCRC8] != -1) {
    counters->badcrcs +=
      mip->mx_counters[mip->counter_index[port][MYRI_MX_CTR_BADCRC8]];
  }
  if (mip->counter_index[port][MYRI_MX_CTR_BADCRC32] != -1) {
    counters->badcrcs +=
      mip->mx_counters[mip->counter_index[port][MYRI_MX_CTR_BADCRC32]];
  }

  return 0;

 error_with_mx_counters:
   free(mip->mx_counters);
   mip->mx_counters = NULL;
 error_with_counter_labels:
   free(mip->counter_labels);
   mip->counter_labels = NULL;
 error:
  return -1;
}

/*
 * Return firmware code
 */
enum lf_firmware_type
myri_firmware_type()
{
  return MYRI_FW_MX_1;
}

/*
 * convert an MX status to a generic myri error code
 */
static int
myri_mx_convert_status(
  mx_raw_status_t mxstat)
{
  switch (mxstat) {
    case MX_DEAD_COMMAND_TIMEOUT:
    case MX_DEAD_ENDPOINT_CLOSE_TIMEOUT:
    case MX_DEAD_ROUTE_UPDATE_TIMEOUT:
    case MX_DEAD_WATCHDOG_TIMEOUT:
      return MYRI_ERROR_FIRMWARE_UNRESPONSIVE;

    case MX_DEAD_RECOVERABLE_SRAM_PARITY_ERROR:
      return MYRI_ERROR_SOFT_SRAM_ERROR;

    case MX_DEAD_SRAM_PARITY_ERROR:
      return MYRI_ERROR_HARD_SRAM_ERROR;

    default:
      return MYRI_ERROR_UNKNOWN;
  }
}

/*
 * set info for NIC auto-reply to scouts if supported
 */
int
myri_set_nic_reply_info(
  int nic_id,
  void *blob,
  int size)
{
#ifdef MX_HAS_RAW_SET_NIC_REPLY_INFO
  mx_return_t mxrc;
  struct myri_mx *mip;

  mip = &Myri[nic_id];

  mxrc = mx_raw_set_nic_reply_info(mip->raw_ep, blob, size);
  if (mxrc != MX_SUCCESS) {
    return -1;
  }
  return 0;
#else
  return 0;
#endif
}
